#ifndef CAFFE_COMMON_HPP_
#define CAFFE_COMMON_HPP_

#include <boost/shared_ptr.hpp>
#include <gflags/gflags.h>
#include <glog/logging.h>

#include <cmath>
#include <fstream>  // NOLINT(readability/streams)
#include <iostream>  // NOLINT(readability/streams)
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <utility>  // pair
#include <vector>

#include "caffe/util/device_alternate.hpp"

// gflags 2.1 issue: namespace google was changed to gflags without warning.
// Luckily we will be able to use GFLAGS_GFAGS_H_ to detect if it is version
// 2.1. If yes , we will add a temporary solution to redirect the namespace.
// TODO(Yangqing): Once gflags solves the problem in a more elegant way, let's
// remove the following hack.
#ifndef GFLAGS_GFLAGS_H_
namespace gflags = google;
#endif  // GFLAGS_GFLAGS_H_

// Disable the copy and assignment operator for a class.
#define DISABLE_COPY_AND_ASSIGN(classname) \
private:\
  classname(const classname&);\
  classname& operator=(const classname&)

// Instantiate a class with float and double specifications.
#define INSTANTIATE_CLASS(classname) \
  template class classname<float>; \
  template class classname<double>

// A simple macro to mark codes that are not implemented, so that when the code
// is executed we will see a fatal log.
#define NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented Yet"

namespace caffe {

// We will use the boost shared_ptr instead of the new C++11 one mainly
// because cuda does not work (at least now) well with C++11 features.
using boost::shared_ptr;

// Common functions and classes from std that caffe often uses.
using std::fstream;
using std::ios;
using std::isnan;
using std::iterator;
using std::make_pair;
using std::map;
using std::ostringstream;
using std::pair;
using std::set;
using std::string;
using std::stringstream;
using std::vector;

// A global initialization function that you should call in your main function.
// Currently it initializes google flags and google logging.
void GlobalInit(int* pargc, char*** pargv);

// A singleton class to hold common caffe stuff, such as the handler that
// caffe is going to use for cublas, curand, etc.
class Caffe {
 public:
  ~Caffe();
  inline static Caffe& Get() {
    if (!singleton_.get()) {
      singleton_.reset(new Caffe());
    }
    return *singleton_;
  }
  enum Brew { CPU, GPU };
  enum Phase { TRAIN, TEST };
  enum GPU_MODE { SINGLE, MASTER_SLAVE, PARALLEL};
  // NOTE by zhirong: All the master and slave configuration and functions are provided
  // only for cublas. For cudnn, we add additional code over there. This is to keep the
  // cleaness the caffe main code.

  // This random number generator facade hides boost and CUDA rng
  // implementation from one another (for cross-platform compatibility).
  class RNG {
   public:
    RNG();
    explicit RNG(unsigned int seed);
    explicit RNG(const RNG&);
    RNG& operator=(const RNG&);
    void* generator();
   private:
    class Generator;
    shared_ptr<Generator> generator_;
  };

  // Getters for boost rng, curand, and cublas handles
  inline static RNG& rng_stream() {
    if (!Get().random_generator_) {
      Get().random_generator_.reset(new RNG());
    }
    return *(Get().random_generator_);
  }
#ifndef CPU_ONLY
  inline static cublasHandle_t cublas_handle() { return Get().cublas_handle_; }
  inline static cublasHandle_t slave_cublas_handle() { return Get().slave_cublas_handle_; }
  inline static curandGenerator_t curand_generator() {
    return Get().curand_generator_;
  }
  inline static curandGenerator_t slave_curand_generator() {
    return Get().slave_curand_generator_;
  }
  inline static cudaStream_t cu_stream() { return Get().cu_stream_; }
  inline static cudaStream_t slave_cu_stream() { return Get().slave_cu_stream_; }
  
  inline static int master_device_id() { return Get().master_device_id_; }
  inline static int slave_device_id() { return Get().slave_device_id_; }

  // switch between master and slave device
  inline static void switch_to_master_device() {
    Get().current_device_id_ = Get().master_device_id_;
    CUDA_CHECK(cudaSetDevice(Get().master_device_id_));
  }
  inline static void switch_to_slave_device() {
    Get().current_device_id_ = Get().slave_device_id_;
    CUDA_CHECK(cudaSetDevice(Get().slave_device_id_));
  }
  // get device info
  inline static int get_current_device_id() { return Get().current_device_id_; }
  inline static cublasHandle_t get_current_cublas_handle() {
    if(Get().gpu_mode_ == MASTER_SLAVE && Get().current_device_id_ == Get().master_device_id_)
      return Get().cublas_handle_;
    else if(Get().gpu_mode_ == MASTER_SLAVE && Get().current_device_id_ == Get().slave_device_id_)
      return Get().slave_cublas_handle_; 
    else { // TODO: error handling
      LOG(FATAL) << "MASTER & SLAVE device not set properly";
      return NULL;
    }
  }
  inline static curandGenerator_t get_current_curand_generator() {
    if(Get().gpu_mode_ == MASTER_SLAVE && Get().current_device_id_ == Get().master_device_id_)
      return Get().curand_generator_;
    else if(Get().gpu_mode_ == MASTER_SLAVE && Get().current_device_id_ == Get().slave_device_id_)
      return Get().slave_curand_generator_;
    else { // TODO: error handling
      LOG(FATAL) << "MASTER & SLAVE device not set properly";
      return NULL;
    }
  }
  inline static cudaStream_t get_current_cu_stream() {
    if(Get().gpu_mode_ == MASTER_SLAVE && Get().current_device_id_ == Get().master_device_id_)
      return Get().cu_stream_;
    else if(Get().gpu_mode_ == MASTER_SLAVE && Get().current_device_id_ == Get().slave_device_id_)
      return Get().slave_cu_stream_;
    else { // TOD: error handling
      LOG(FATAL) << "MASTER & SLAVE device not set properly";
      return NULL;
    }
  }

#endif

  // Returns the mode: running on CPU or GPU.
  inline static Brew mode() { return Get().mode_; }
  // Returns the phase: TRAIN or TEST.
  inline static Phase phase() { return Get().phase_; }
  // Returns the GPU mode:
  inline static GPU_MODE gpu_mode() { return Get().gpu_mode_; }
  // The setters for the variables
  // Sets the mode. It is recommended that you don't change the mode halfway
  // into the program since that may cause allocation of pinned memory being
  // freed in a non-pinned way, which may cause problems - I haven't verified
  // it personally but better to note it here in the header file.
  inline static void set_mode(Brew mode) { Get().mode_ = mode; }
  // Sets the phase.
  inline static void set_phase(Phase phase) { Get().phase_ = phase; }
  // Sets the GPU mode
  inline static void set_gpu_mode(GPU_MODE gpu_mode) { Get().gpu_mode_ = gpu_mode; }
  // Sets the random seed of both boost and curand
  static void set_random_seed(const unsigned int seed);
  // Sets the device. Since we have cublas and curand stuff, set device also
  // requires us to reset those values.
  static void SetDevice(const int device_id);
  static void SetSlaveDevice(const int slave_device_id);
  // Prints the current GPU status.
  static void DeviceQuery();
  static void SlaveDeviceQuery(const int slave_device_id);

  static void ConnectMasterSlaveDevice(const int master_device_id, const int slave_device_id);
  // added for allowing bigger batch size
  inline static void set_accumulate(bool acum) { Get().accumulate_ = acum; }
  inline static bool accumulate() { return Get().accumulate_; }

 protected:
#ifndef CPU_ONLY
  curandGenerator_t curand_generator_;
  cudaStream_t cu_stream_;
  curandGenerator_t slave_curand_generator_;
  cudaStream_t slave_cu_stream_;
  cublasHandle_t cublas_handle_;
  cublasHandle_t slave_cublas_handle_;
  shared_ptr<RNG> slave_random_generator_; 
#endif
  shared_ptr<RNG> random_generator_;
  // added for allowing bigger batch size
  bool accumulate_;
  Brew mode_;
  Phase phase_;
  GPU_MODE gpu_mode_;

  int master_device_id_;
  int slave_device_id_;
  int current_device_id_;
  static shared_ptr<Caffe> singleton_;

 private:
  // The private constructor to avoid duplicate instantiation.
  Caffe();

  DISABLE_COPY_AND_ASSIGN(Caffe);
};

}  // namespace caffe

#endif  // CAFFE_COMMON_HPP_
